##########################################
# Replication Data for Proksch, Lowe, Wäckerle, Soroka. (2018). Multilingual Sentiment Analysis: A New Approach to Measuring Conflict in Legislative Speeches. Legislative Studies Quarterly, Forthcoming.
##########################################

#Part 5: Comparative Application: Sentiment in European Parliaments
##########################
# Intro
rm(list = ls())
#install_version("quanteda", version = "1.1.1", repos = "http://cran.us.r-project.org") #all analysis is run on quanteda version 1.1.1
require(quanteda)
require(tidyverse)
library(zoo)
library(readstata13)
library(rstudioapi)

current_path <- getActiveDocumentContext()$path 
setwd(dirname(current_path ))

# turn a date into a 'monthnumber' relative to an origin
monnb <- function(d) { lt <- as.POSIXlt(as.Date(d, origin="1900-01-01"));
lt$year*12 + lt$mon } 
# compute a month difference as a difference between two monnb's
mondf <- function(d1, d2) { monnb(d2) - monnb(d1) }
#https://stackoverflow.com/a/1995984

min_govs=read.dta13("5_EU_minority_govs.dta")

min_govs<-min_govs%>%
  mutate(
    country_name=case_when(
      country==1 ~ "Belgium",
      country==2 ~ "Bulgaria",
      country==3 ~ "Czech Republic",
      country==4 ~ "Denmark",
      country==5 ~ "Germany",
      country==6 ~ "Estonia",
      country==7 ~ "Ireland",
      country==8 ~ "Greece",
      country==9 ~ "Spain",
      country==10 ~ "France",
      country==11 ~ "Croatia",
      country==12 ~ "Italy",
      country==13 ~ "Cyprus",
      country==14 ~ "Latvia",
      country==15 ~ "Lithuania",
      country==16 ~ "Luxembourg",
      country==17 ~ "Hungary",
      country==18 ~ "Malta",
      country==19 ~ "Netherlands",
      country==20 ~ "Austria",
      country==21 ~ "Poland",
      country==22 ~ "Portugal",
      country==23 ~ "Romania",
      country==24 ~ "Slovenia",
      country==25 ~ "Slovakia",
      country==26 ~ "Finland",
      country==27 ~ "Sweden",
      country==28 ~ "United Kingdom",
      TRUE ~ "Other"
    )
  )


###############################################
# Spain
###############################################

load("Corp_Congresso.Rdata")
cong.corpus$date=as.Date(cong.corpus$date)
cong.corpus$speechnumber=as.character(cong.corpus$speechnumber)
cong.corpus$months=format(cong.corpus$date,"%Y-%m")
cong.corpus$months=as.Date(paste(cong.corpus$months,"-01",sep=""))
cong.corpus=cong.corpus%>%filter(!party%in%c("other"))
cong.corpus=cong.corpus%>%filter(chair==FALSE)
cong.corpus$party.label=NA

cong.corpus$party.label[cong.corpus$party=="CC"]="Canary Coalition"
cong.corpus$party.label[cong.corpus$party=="CDS"]="Democratic and Social Centre"
cong.corpus$party.label[cong.corpus$party=="CHA"]="Aragonese Council"
cong.corpus$party.label[cong.corpus$party=="CiU"]="Convergence and Union"
cong.corpus$party.label[cong.corpus$party=="ERC"]="Republican Left of Catalonia"
cong.corpus$party.label[cong.corpus$party=="IP"]="Left of the Peoples"
cong.corpus$party.label[cong.corpus$party=="IU"]="United Left"
cong.corpus$party.label[cong.corpus$party=="IU-ICV"]="Initiative for Catalonia Greens"
cong.corpus$party.label[cong.corpus$party=="PA"]="Andalusian Party"
cong.corpus$party.label[cong.corpus$party=="PNV"]="Basque Nationalist Party"
cong.corpus$party.label[cong.corpus$party=="PP"]="People's Party"
cong.corpus$party.label[cong.corpus$party=="PSOE"]="Socialist Workers' Party"
cong.corpus$party.label[cong.corpus$party=="UPyD"]="Union, Progress and Democracy"

cong.corpus$Gov_Parties="PP"
cong.corpus$Prime_Minister="PP"
cong.corpus$Gov_Parties[cong.corpus$date<as.Date("2011/12/21","%Y/%m/%d")]="PSOE"
cong.corpus$Prime_Minister[cong.corpus$date<as.Date("2011/12/21","%Y/%m/%d")]="PSOE"
cong.corpus$Gov_Parties[cong.corpus$date<as.Date("2004/04/17","%Y/%m/%d")]="PP"
cong.corpus$Prime_Minister[cong.corpus$date<as.Date("2004/04/17","%Y/%m/%d")]="PP"
cong.corpus$Gov_Parties[cong.corpus$date<as.Date("1996/05/05","%Y/%m/%d")]="PSOE"
cong.corpus$Prime_Minister[cong.corpus$date<as.Date("1996/05/05","%Y/%m/%d")]="PSOE"

cong.corpus$Role=NA
for(i in unique(cong.corpus$party)){
  cong.corpus$Role[cong.corpus$party==i&grepl(i,cong.corpus$Gov_Parties)&cong.corpus$Prime_Minister==i]="Prime Minister Party"
  cong.corpus$Role[cong.corpus$party==i&grepl(i,cong.corpus$Gov_Parties)&cong.corpus$Prime_Minister!=i]="Junior Coalition Partner"
  cong.corpus$Role[cong.corpus$party==i&!grepl(i,cong.corpus$Gov_Parties)]="Opposition"
}

cong.corpus$nextelection=as.Date("2020/06/06","%Y/%m/%d")
cong.corpus$nextelection[cong.corpus$date<as.Date("2016/06/26","%Y/%m/%d")]=as.Date("2016/06/26","%Y/%m/%d")
cong.corpus$nextelection[cong.corpus$date<as.Date("2015/12/20","%Y/%m/%d")]=as.Date("2015/12/20","%Y/%m/%d")
cong.corpus$nextelection[cong.corpus$date<as.Date("2011/11/20","%Y/%m/%d")]=as.Date("2011/11/20","%Y/%m/%d")
cong.corpus$nextelection[cong.corpus$date<as.Date("2008/03/09","%Y/%m/%d")]=as.Date("2008/03/09","%Y/%m/%d")
cong.corpus$nextelection[cong.corpus$date<as.Date("2004/03/14","%Y/%m/%d")]=as.Date("2004/03/14","%Y/%m/%d")
cong.corpus$nextelection[cong.corpus$date<as.Date("2000/03/12","%Y/%m/%d")]=as.Date("2000/03/12","%Y/%m/%d")
cong.corpus$nextelection[cong.corpus$date<as.Date("1996/03/03","%Y/%m/%d")]=as.Date("1996/03/03","%Y/%m/%d")
cong.corpus$nextelection[cong.corpus$date<as.Date("1993/06/06","%Y/%m/%d")]=as.Date("1993/06/06","%Y/%m/%d")
cong.corpus$time_to_next_election=cong.corpus$date-cong.corpus$nextelection
cong.corpus$dist_to_next_election_months=mondf(cong.corpus$months,cong.corpus$nextelection)
cong.corpus$log_dist_to_next_election_months=(log(cong.corpus$dist_to_next_election_months))*-1
cong.corpus$country_name="Spain"

cong.corpus=left_join(x=cong.corpus,y=min_govs,by=c("date", "country_name"))
cong.corpus$last_year_before_election=ifelse(cong.corpus$dist_to_next_election_months<13,1,0)

corpus_spain=corpus(cong.corpus,text_field = "text",docid_field = "speechnumber")
save(corpus_spain,file="5_corpus_spain.RData")
rm(cong.corpus,corpus_spain)

###############################################
# Finland
###############################################

load("Corp_Eduskundta.Rdata")
ed.corpus$date=as.Date(ed.corpus$date)
ed.corpus$speechnumber=as.character(ed.corpus$speechnumber)
ed.corpus$months=format(ed.corpus$date,"%Y-%m")
ed.corpus$months=as.Date(paste(ed.corpus$months,"-01",sep=""))
ed.corpus$parliament="FI-ED"
ed.corpus=ed.corpus%>%filter(!party%in%c("other","AlkioistCentristGroup","GroupAlkiolainen","GroupPuhjo","GroupVirtanen","M11","NUOR","va-r","Vr"))

ed.corpus$party.label=NA
ed.corpus$party.label[ed.corpus$party=="KD/SKL"]="Christian Democrats"
ed.corpus$party.label[ed.corpus$party=="Kesk"]="Centre Party - Keskusta"
ed.corpus$party.label[ed.corpus$party=="Kok"]="National Coalition Party - Kok"
ed.corpus$party.label[ed.corpus$party=="LKP"]="Liberal People's Party"
ed.corpus$party.label[ed.corpus$party=="PS"]="Finnish Party | True Finns"
ed.corpus$party.label[ed.corpus$party=="Remonttiryhmä"]="Reform Group"
ed.corpus$party.label[ed.corpus$party=="RKP"]="Swedish People's Party"
ed.corpus$party.label[ed.corpus$party=="SDP"]="Social Democratic Party - SSDP"
ed.corpus$party.label[ed.corpus$party=="SMP"]="Finnish Rural Party"
ed.corpus$party.label[ed.corpus$party=="Vas"]="Left Alliance"
ed.corpus$party.label[ed.corpus$party=="Vihreät"]="Greens"

ed.corpus$Gov_Parties="Kesk, PS, Kok"
ed.corpus$Prime_Minister="Kesk"
ed.corpus$Gov_Parties[ed.corpus$date<as.Date("2015/05/29","%Y/%m/%d")]="Kok, SDP, RKP, KD/SKL"
ed.corpus$Prime_Minister[ed.corpus$date<as.Date("2015/05/29","%Y/%m/%d")]="Kok"
ed.corpus$Gov_Parties[ed.corpus$date<as.Date("2014/09/20","%Y/%m/%d")]="Kok, SDP, RKP, Vihreät, KD/SKL"
ed.corpus$Prime_Minister[ed.corpus$date<as.Date("2014/09/20","%Y/%m/%d")]="Kok"
ed.corpus$Gov_Parties[ed.corpus$date<as.Date("2014/03/25","%Y/%m/%d")]="Kok, SDP, RKP, Vihreät, Vas, KD/SKL"
ed.corpus$Prime_Minister[ed.corpus$date<as.Date("2014/03/25","%Y/%m/%d")]="Kok"
ed.corpus$Gov_Parties[ed.corpus$date<as.Date("2011/06/22","%Y/%m/%d")]="Kesk, Kok, Vihreät, RKP"
ed.corpus$Prime_Minister[ed.corpus$date<as.Date("2011/06/22","%Y/%m/%d")]="Kesk"
ed.corpus$Gov_Parties[ed.corpus$date<as.Date("2007/04/19","%Y/%m/%d")]="Kesk, SDP, RKP"
ed.corpus$Prime_Minister[ed.corpus$date<as.Date("2007/04/19","%Y/%m/%d")]="Kesk"
ed.corpus$Gov_Parties[ed.corpus$date<as.Date("2003/04/17","%Y/%m/%d")]="SDP, Kok, Vas, RKP"
ed.corpus$Prime_Minister[ed.corpus$date<as.Date("2003/04/17","%Y/%m/%d")]="SDP"
ed.corpus$Gov_Parties[ed.corpus$date<as.Date("2002/05/31","%Y/%m/%d")]="SDP, Kok, Vas, Vihreät, RKP"
ed.corpus$Prime_Minister[ed.corpus$date<as.Date("2002/05/31","%Y/%m/%d")]="SDP"
ed.corpus$Gov_Parties[ed.corpus$date<as.Date("1995/04/13","%Y/%m/%d")]="Kesk, Kok, RKP"
ed.corpus$Prime_Minister[ed.corpus$date<as.Date("1995/04/13","%Y/%m/%d")]="Kesk"
ed.corpus$Gov_Parties[ed.corpus$date<as.Date("1994/06/26","%Y/%m/%d")]="Kesk, Kok, RKP, KD/SKL"
ed.corpus$Prime_Minister[ed.corpus$date<as.Date("1994/06/26","%Y/%m/%d")]="Kesk"

ed.corpus$Role=NA
for(i in unique(ed.corpus$party)){
ed.corpus$Role[ed.corpus$party==i&grepl(i,ed.corpus$Gov_Parties)&ed.corpus$Prime_Minister==i]="Prime Minister Party"
ed.corpus$Role[ed.corpus$party==i&grepl(i,ed.corpus$Gov_Parties)&ed.corpus$Prime_Minister!=i]="Junior Coalition Partner"
ed.corpus$Role[ed.corpus$party==i&!grepl(i,ed.corpus$Gov_Parties)]="Opposition"
}
ed.corpus$nextelection=as.Date("2019/04/14","%Y/%m/%d")
ed.corpus$nextelection[ed.corpus$date<as.Date("2015/04/19","%Y/%m/%d")]=as.Date("2015/04/19","%Y/%m/%d")
ed.corpus$nextelection[ed.corpus$date<as.Date("2011/04/17","%Y/%m/%d")]=as.Date("2011/04/17","%Y/%m/%d")
ed.corpus$nextelection[ed.corpus$date<as.Date("2007/03/18","%Y/%m/%d")]=as.Date("2007/03/18","%Y/%m/%d")
ed.corpus$nextelection[ed.corpus$date<as.Date("2003/03/16","%Y/%m/%d")]=as.Date("2003/03/16","%Y/%m/%d")
ed.corpus$nextelection[ed.corpus$date<as.Date("1999/03/21","%Y/%m/%d")]=as.Date("1999/03/21","%Y/%m/%d")
ed.corpus$nextelection[ed.corpus$date<as.Date("1995/03/19","%Y/%m/%d")]=as.Date("1995/03/19","%Y/%m/%d")
ed.corpus$nextelection[ed.corpus$date<as.Date("1991/03/17","%Y/%m/%d")]=as.Date("1991/03/17","%Y/%m/%d")
ed.corpus$time_to_next_election=ed.corpus$date-ed.corpus$nextelection
table(ed.corpus$nextelection,exclude=NULL)

ed.corpus$dist_to_next_election_months=mondf(ed.corpus$months,ed.corpus$nextelection)
ed.corpus$log_dist_to_next_election_months=(log(ed.corpus$dist_to_next_election_months))*-1
ed.corpus$country_name="Finland"

ed.corpus=left_join(x=ed.corpus,y=min_govs,by=c("date", "country_name"))
ed.corpus$last_year_before_election=ifelse(ed.corpus$dist_to_next_election_months<13,1,0)

corpus_finland=corpus(ed.corpus,text_field = "text",docid_field = "speechnumber")
save(corpus_finland,file="5_corpus_finland.RData")
rm(ed.corpus,corpus_finland)

###############################################
# Germany
###############################################

load("Corp_Bundestag.Rdata")
bt.corpus$date=as.Date(bt.corpus$date)
bt.corpus$speechnumber=as.character(bt.corpus$speechnumber)
bt.corpus$months=format(bt.corpus$date,"%Y-%m")
bt.corpus$months=as.Date(paste(bt.corpus$months,"-01",sep=""))
bt.corpus=bt.corpus%>%filter(party%in%c("CDU/CSU","FDP","GRUENE","SPD","PDS/LINKE"))
bt.corpus=bt.corpus%>%filter(chair==FALSE)
bt.corpus$party.label=NA
bt.corpus$party.label[bt.corpus$party=="CDU/CSU"]="Christian Democrats"
bt.corpus$party.label[bt.corpus$party=="FDP"]="Liberals"
bt.corpus$party.label[bt.corpus$party=="GRUENE"]="Green Party"
bt.corpus$party.label[bt.corpus$party=="SPD"]="Social Democrats"
bt.corpus$party.label[bt.corpus$party=="PDS/LINKE"]="The Left"

bt.corpus$Gov_Parties="CDU/CSU, SPD"
bt.corpus$Prime_Minister="CDU/CSU"
bt.corpus$Gov_Parties[bt.corpus$date<as.Date("2013/12/17","%Y/%m/%d")]="CDU/CSU, FDP"
bt.corpus$Prime_Minister[bt.corpus$date<as.Date("2013/12/17","%Y/%m/%d")]="CDU/CSU"
bt.corpus$Gov_Parties[bt.corpus$date<as.Date("2009/10/28","%Y/%m/%d")]="CDU/CSU, SPD"
bt.corpus$Prime_Minister[bt.corpus$date<as.Date("2009/10/28","%Y/%m/%d")]="CDU/CSU"
bt.corpus$Gov_Parties[bt.corpus$date<as.Date("2005/11/22","%Y/%m/%d")]="SPD, GRUENE"
bt.corpus$Prime_Minister[bt.corpus$date<as.Date("2005/11/22","%Y/%m/%d")]="SPD"
bt.corpus$Gov_Parties[bt.corpus$date<as.Date("1998/10/27","%Y/%m/%d")]="CDU/CSU, FDP"
bt.corpus$Prime_Minister[bt.corpus$date<as.Date("1998/10/27","%Y/%m/%d")]="CDU/CSU"

for(i in unique(bt.corpus$party)){
  bt.corpus$Role[bt.corpus$party==i&grepl(i,bt.corpus$Gov_Parties)&bt.corpus$Prime_Minister==i]="Prime Minister Party"
  bt.corpus$Role[bt.corpus$party==i&grepl(i,bt.corpus$Gov_Parties)&bt.corpus$Prime_Minister!=i]="Junior Coalition Partner"
  bt.corpus$Role[bt.corpus$party==i&!grepl(i,bt.corpus$Gov_Parties)]="Opposition"
}

bt.corpus$nextelection=as.Date("2021/09/24","%Y/%m/%d")
bt.corpus$nextelection[bt.corpus$date<as.Date("2017/09/24","%Y/%m/%d")]=as.Date("2017/09/24","%Y/%m/%d")
bt.corpus$nextelection[bt.corpus$date<as.Date("2013/09/22","%Y/%m/%d")]=as.Date("2013/09/22","%Y/%m/%d")
bt.corpus$nextelection[bt.corpus$date<as.Date("2009/09/27","%Y/%m/%d")]=as.Date("2009/09/27","%Y/%m/%d")
bt.corpus$nextelection[bt.corpus$date<as.Date("2005/09/18","%Y/%m/%d")]=as.Date("2005/09/18","%Y/%m/%d")
bt.corpus$nextelection[bt.corpus$date<as.Date("2002/09/22","%Y/%m/%d")]=as.Date("2002/09/22","%Y/%m/%d")
bt.corpus$nextelection[bt.corpus$date<as.Date("1998/09/27","%Y/%m/%d")]=as.Date("1998/09/27","%Y/%m/%d")
bt.corpus$nextelection[bt.corpus$date<as.Date("1994/10/16","%Y/%m/%d")]=as.Date("1994/10/16","%Y/%m/%d")
bt.corpus$time_to_next_election=bt.corpus$date-bt.corpus$nextelection

bt.corpus$dist_to_next_election_months=mondf(bt.corpus$months,bt.corpus$nextelection)
bt.corpus$log_dist_to_next_election_months=(log(bt.corpus$dist_to_next_election_months))*-1
bt.corpus$country_name="Germany"

bt.corpus=left_join(x=bt.corpus,y=min_govs,by=c("date", "country_name"))
bt.corpus$last_year_before_election=ifelse(bt.corpus$dist_to_next_election_months<13,1,0)

corpus_germany=corpus(bt.corpus,text_field = "text",docid_field = "speechnumber")
save(corpus_germany,file="5_corpus_germany.RData")
rm(bt.corpus,corpus_germany)

###############################################
# Czech Republic
###############################################

load("Corp_PSP.Rdata")
psp.corpus$date=as.Date(psp.corpus$date)
psp.corpus$speechnumber=as.character(psp.corpus$speechnumber)
psp.corpus$months=format(psp.corpus$date,"%Y-%m")
psp.corpus$months=as.Date(paste(psp.corpus$months,"-01",sep=""))
tab=table(psp.corpus$party)
psp.corpus=psp.corpus%>%filter(chair==FALSE)
psp.corpus=psp.corpus%>%filter(!party%in%names(tab)[c(2,3,6,11,13,14,17,18)])
tab2=table(psp.corpus$party)
parties.czech=names(tab2)
psp.corpus$party.label=NA
psp.corpus$party.label[psp.corpus$party==parties.czech[1]]="Action of Dissatisfied Citizens"
psp.corpus$party.label[psp.corpus$party==parties.czech[2]]="Czech Social Democratic Party"
psp.corpus$party.label[psp.corpus$party==parties.czech[3]]="Movement for Self-Governing Democracy -- Society for Moravia and Silesia"
psp.corpus$party.label[psp.corpus$party==parties.czech[4]]="Christian Democratic Party"
psp.corpus$party.label[psp.corpus$party==parties.czech[5]]="Christian Democratic Union -- People's Party"
psp.corpus$party.label[psp.corpus$party==parties.czech[6]]="Communist Party of Bohemia and Moravia"
psp.corpus$party.label[psp.corpus$party==parties.czech[7]]="Left Bloc"
psp.corpus$party.label[psp.corpus$party==parties.czech[8]]="Liberal Social Union"
psp.corpus$party.label[psp.corpus$party==parties.czech[9]]="Civic Democratic Alliance"
psp.corpus$party.label[psp.corpus$party==parties.czech[10]]="Civic Democratic Party"
psp.corpus$party.label[psp.corpus$party==parties.czech[11]]="Rally for the Republic -- Republican Party of Czechoslovakia"
psp.corpus$party.label[psp.corpus$party==parties.czech[12]]="Green Party"
psp.corpus$party.label[psp.corpus$party==parties.czech[13]]="Tradition Responsibility Prosperity 09"
psp.corpus$party.label[psp.corpus$party==parties.czech[14]]="Tradition Responsibility Prosperity 09"
psp.corpus$party.label[psp.corpus$party==parties.czech[15]]="Freedom Union"
psp.corpus$party.label[psp.corpus$party==parties.czech[16]]="Freedom Union -- Democratic Union"
psp.corpus$party.label[psp.corpus$party==parties.czech[17]]="Dawn of Direct Democracy"
psp.corpus$party.label[psp.corpus$party==parties.czech[18]]="Public Affairs"
table(psp.corpus$party.label,exclude=NULL)
psp.corpus$party[psp.corpus$party==parties.czech[2]]="CSSD"
psp.corpus$party[psp.corpus$party==parties.czech[5]]="KDU-CSL"
psp.corpus$party[psp.corpus$party==parties.czech[6]]="KSCM"
psp.corpus$party[psp.corpus$party==parties.czech[11]]="SPR-RSC"
psp.corpus$party[psp.corpus$party==parties.czech[13]]="TOP 09 a Starostove"
psp.corpus$party[psp.corpus$party==parties.czech[17]]="Usvit"


psp.corpus$Gov_Parties="CSSD, ANO, KDU-CSL"
psp.corpus$Prime_Minister="CSSD"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("2014/01/17","%Y/%m/%d")]="none"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("2014/01/17","%Y/%m/%d")]="none"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("2013/07/10","%Y/%m/%d")]="ODS, TOP09, TOP 09 a Starostove"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("2013/07/10","%Y/%m/%d")]="ODS"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("2012/04/27","%Y/%m/%d")]="ODS, TOP09, TOP 09 a Starostove, VV"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("2012/04/27","%Y/%m/%d")]="ODS"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("2010/06/28","%Y/%m/%d")]="none"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("2010/06/28","%Y/%m/%d")]="none"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("2009/04/09","%Y/%m/%d")]="ODS, KDU-CSL, SZ"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("2009/04/09","%Y/%m/%d")]="ODS"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("2007/01/09","%Y/%m/%d")]="ODS"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("2007/01/09","%Y/%m/%d")]="ODS"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("2006/08/16","%Y/%m/%d")]="CSSD, KDU-CSL, US-DEU"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("2006/08/16","%Y/%m/%d")]="CSSD"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("2002/07/15","%Y/%m/%d")]="CSSD"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("2002/07/15","%Y/%m/%d")]="CSSD"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("1998/07/17","%Y/%m/%d")]="KDU-CSL, ODA, US"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("1998/07/17","%Y/%m/%d")]="none"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("1998/01/02","%Y/%m/%d")]="ODS, KDU-CSL, ODA"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("1998/01/02","%Y/%m/%d")]="ODS"

psp.corpus$Gov_Parties[psp.corpus$date<as.Date("1996/07/02 ","%Y/%m/%d")]="ODS, KDU-CSL, ODA, KDS"
psp.corpus$Prime_Minister[psp.corpus$date<as.Date("1996/07/02 ","%Y/%m/%d")]="ODS"

for(i in unique(psp.corpus$party)){
  psp.corpus$Role[psp.corpus$party==i&grepl(i,psp.corpus$Gov_Parties)&psp.corpus$Prime_Minister==i]="Prime Minister Party"
  psp.corpus$Role[psp.corpus$party==i&grepl(i,psp.corpus$Gov_Parties)&psp.corpus$Prime_Minister!=i]="Junior Coalition Partner"
  psp.corpus$Role[psp.corpus$party==i&!grepl(i,psp.corpus$Gov_Parties)]="Opposition"
}

psp.corpus$nextelection=as.Date("2021/10/21","%Y/%m/%d")
psp.corpus$nextelection[psp.corpus$date<as.Date("2017/10/21","%Y/%m/%d")]=as.Date("2017/10/21","%Y/%m/%d")
psp.corpus$nextelection[psp.corpus$date<as.Date("2013/10/25","%Y/%m/%d")]=as.Date("2013/10/25","%Y/%m/%d")
psp.corpus$nextelection[psp.corpus$date<as.Date("2010/05/29","%Y/%m/%d")]=as.Date("2010/05/29","%Y/%m/%d")
psp.corpus$nextelection[psp.corpus$date<as.Date("2006/06/03","%Y/%m/%d")]=as.Date("2006/06/03","%Y/%m/%d")
psp.corpus$nextelection[psp.corpus$date<as.Date("2002/06/15","%Y/%m/%d")]=as.Date("2002/06/15","%Y/%m/%d")
psp.corpus$nextelection[psp.corpus$date<as.Date("1998/06/20","%Y/%m/%d")]=as.Date("1998/06/20","%Y/%m/%d")
psp.corpus$nextelection[psp.corpus$date<as.Date("1996/06/01","%Y/%m/%d")]=as.Date("1996/06/01","%Y/%m/%d")
psp.corpus$time_to_next_election=psp.corpus$date-psp.corpus$nextelection

psp.corpus$dist_to_next_election_months=mondf(psp.corpus$months,psp.corpus$nextelection)
psp.corpus$log_dist_to_next_election_months=(log(psp.corpus$dist_to_next_election_months))*-1
psp.corpus$country_name="Czech Republic"

psp.corpus=left_join(x=psp.corpus,y=min_govs,by=c("date", "country_name"))
psp.corpus$last_year_before_election=ifelse(psp.corpus$dist_to_next_election_months<13,1,0)

corpus_czech=corpus(psp.corpus,text_field = "text",docid_field = "speechnumber")
save(corpus_czech,file="5_corpus_czech.RData")
rm(psp.corpus,corpus_czech)

###############################################
# Sweden
###############################################

load("Corp_Riksdag.Rdata")
rd.corpus$date=as.Date(rd.corpus$date)
rd.corpus$speechnumber=as.character(rd.corpus$speechnumber)
rd.corpus$months=format(rd.corpus$date,"%Y-%m")
rd.corpus$months=as.Date(paste(rd.corpus$months,"-01",sep=""))
rd.corpus=rd.corpus%>%filter(chair==FALSE)
rd.corpus=rd.corpus%>%filter(party%in%c("C","FP","KD","L","M","MP","NYD","S","SD","V"))
rd.corpus$party.label=NA
rd.corpus$party.label[rd.corpus$party=="C"]="Centre Party"
rd.corpus$party.label[rd.corpus$party=="FP"]="Liberal People's Party"
rd.corpus$party.label[rd.corpus$party=="KD"]="Christian Democrats"
rd.corpus$party.label[rd.corpus$party=="L"]="Liberals"
rd.corpus$party.label[rd.corpus$party=="M"]="Moderates"
rd.corpus$party.label[rd.corpus$party=="MP"]="Green Party"
rd.corpus$party.label[rd.corpus$party=="NYD"]="New Democracy"
rd.corpus$party.label[rd.corpus$party=="S"]="Social Democratic Party"
rd.corpus$party.label[rd.corpus$party=="SD"]="Sweden Democrats"
rd.corpus$party.label[rd.corpus$party=="V"]="Left Party"

rd.corpus$party[rd.corpus$party=="M"]="MSP"
rd.corpus$party[rd.corpus$party=="S"]="SAP"

rd.corpus$Gov_Parties="SAP, MP"
rd.corpus$Prime_Minister="SAP"

rd.corpus$Gov_Parties[rd.corpus$date<as.Date("2014/10/02","%Y/%m/%d")]="MSP, FP, C, KD"
rd.corpus$Prime_Minister[rd.corpus$date<as.Date("2014/10/02","%Y/%m/%d")]="MSP"

rd.corpus$Gov_Parties[rd.corpus$date<as.Date("2006/10/05","%Y/%m/%d")]="SAP"
rd.corpus$Prime_Minister[rd.corpus$date<as.Date("2006/10/05","%Y/%m/%d")]="SAP"

rd.corpus$Gov_Parties[rd.corpus$date<as.Date("1994/10/06","%Y/%m/%d")]="MSP, FP, C, KD"
rd.corpus$Prime_Minister[rd.corpus$date<as.Date("1994/10/06","%Y/%m/%d")]="MSP"

rd.corpus$Gov_Parties[rd.corpus$date<as.Date("1991/10/03","%Y/%m/%d")]="SAP"
rd.corpus$Prime_Minister[rd.corpus$date<as.Date("1991/10/03","%Y/%m/%d")]="SAP"

for(i in unique(rd.corpus$party)){
  rd.corpus$Role[rd.corpus$party==i&grepl(i,rd.corpus$Gov_Parties)&rd.corpus$Prime_Minister==i]="Prime Minister Party"
  rd.corpus$Role[rd.corpus$party==i&grepl(i,rd.corpus$Gov_Parties)&rd.corpus$Prime_Minister!=i]="Junior Coalition Partner"
  rd.corpus$Role[rd.corpus$party==i&!grepl(i,rd.corpus$Gov_Parties)]="Opposition"
}

rd.corpus$nextelection=as.Date("2018/09/09","%Y/%m/%d")
rd.corpus$nextelection[rd.corpus$date<as.Date("2014/09/14","%Y/%m/%d")]=as.Date("2014/09/14","%Y/%m/%d")
rd.corpus$nextelection[rd.corpus$date<as.Date("2010/09/19","%Y/%m/%d")]=as.Date("2010/09/19","%Y/%m/%d")
rd.corpus$nextelection[rd.corpus$date<as.Date("2006/09/17","%Y/%m/%d")]=as.Date("2006/09/17","%Y/%m/%d")
rd.corpus$nextelection[rd.corpus$date<as.Date("2002/09/15","%Y/%m/%d")]=as.Date("2002/09/15","%Y/%m/%d")
rd.corpus$nextelection[rd.corpus$date<as.Date("1998/09/20","%Y/%m/%d")]=as.Date("1998/09/20","%Y/%m/%d")
rd.corpus$nextelection[rd.corpus$date<as.Date("1994/09/18","%Y/%m/%d")]=as.Date("1994/09/18","%Y/%m/%d")
rd.corpus$nextelection[rd.corpus$date<as.Date("1991/09/15","%Y/%m/%d")]=as.Date("1991/09/15","%Y/%m/%d")
rd.corpus$time_to_next_election=rd.corpus$date-rd.corpus$nextelection

rd.corpus$dist_to_next_election_months=mondf(rd.corpus$months,rd.corpus$nextelection)
rd.corpus$log_dist_to_next_election_months=(log(rd.corpus$dist_to_next_election_months))*-1
rd.corpus$country_name="Sweden"

rd.corpus=left_join(x=rd.corpus,y=min_govs,by=c("date", "country_name"))
rd.corpus$last_year_before_election=ifelse(rd.corpus$dist_to_next_election_months<13,1,0)

corpus_sweden=corpus(rd.corpus,text_field = "text",docid_field = "speechnumber")
save(corpus_sweden,file="5_corpus_sweden.RData")
rm(rd.corpus,corpus_sweden)

###############################################
# Netherlands
###############################################

load("Corp_TweedeKamer.Rdata")
tk.corpus$date=as.Date(tk.corpus$date)
tk.corpus$speechnumber=as.character(tk.corpus$speechnumber)
tk.corpus$months=format(tk.corpus$date,"%Y-%m")
tk.corpus$months=as.Date(paste(tk.corpus$months,"-01",sep=""))
tk.corpus=tk.corpus%>%filter(chair==FALSE)
tk.corpus=tk.corpus%>%filter(party!="other")
tk.corpus$party.label=NA
tk.corpus$party.label[tk.corpus$party=="CDA"]="Christian Democratic Appeal"
tk.corpus$party.label[tk.corpus$party=="CU"]="ChristianUnion"
tk.corpus$party.label[tk.corpus$party=="D66"]="Democrats 66"
tk.corpus$party.label[tk.corpus$party=="GL"]="GreenLeft"
tk.corpus$party.label[tk.corpus$party=="GPV"]="Reformed Political League"
tk.corpus$party.label[tk.corpus$party=="LPF"]="Fortuyn List"
tk.corpus$party.label[tk.corpus$party=="PvdA"]="Labour Party"
tk.corpus$party.label[tk.corpus$party=="PvdD"]="Party for the Animals"
tk.corpus$party.label[tk.corpus$party=="PVV"]="Party for Freedom"
tk.corpus$party.label[tk.corpus$party=="RPF"]="Reformatory Political Federation"
tk.corpus$party.label[tk.corpus$party=="SGP"]="Political Reformed Party"
tk.corpus$party.label[tk.corpus$party=="SP"]="Socialist Party"
tk.corpus$party.label[tk.corpus$party=="VVD"]="People's Party for Freedom and Democracy"

tk.corpus$Gov_Parties="VVD, PvdA"
tk.corpus$Prime_Minister="VVD"

tk.corpus$Gov_Parties[tk.corpus$date<as.Date("2012/11/05","%Y/%m/%d")]="VVD, CDA"
tk.corpus$Prime_Minister[tk.corpus$date<as.Date("2012/11/05","%Y/%m/%d")]="VVD"

tk.corpus$Gov_Parties[tk.corpus$date<as.Date("2010/10/14","%Y/%m/%d")]="CDA, CU"
tk.corpus$Prime_Minister[tk.corpus$date<as.Date("2010/10/14","%Y/%m/%d")]="CDA"

tk.corpus$Gov_Parties[tk.corpus$date<as.Date("2010/02/23","%Y/%m/%d")]="CDA, PvdA, CU"
tk.corpus$Prime_Minister[tk.corpus$date<as.Date("2010/02/23","%Y/%m/%d")]="CDA"

tk.corpus$Gov_Parties[tk.corpus$date<as.Date("2007/02/22","%Y/%m/%d")]="CDA, VVD"
tk.corpus$Prime_Minister[tk.corpus$date<as.Date("2007/02/22","%Y/%m/%d")]="CDA"

tk.corpus$Gov_Parties[tk.corpus$date<as.Date("2006/07/07","%Y/%m/%d")]="CDA, VVD, D66"
tk.corpus$Prime_Minister[tk.corpus$date<as.Date("2006/07/07","%Y/%m/%d")]="CDA"

tk.corpus$Gov_Parties[tk.corpus$date<as.Date("2003/05/27","%Y/%m/%d")]="CDA, LPF, VVD"
tk.corpus$Prime_Minister[tk.corpus$date<as.Date("2003/05/27","%Y/%m/%d")]="CDA"

tk.corpus$Gov_Parties[tk.corpus$date<as.Date("2002/07/22","%Y/%m/%d")]="PvdA, VVD, D66"
tk.corpus$Prime_Minister[tk.corpus$date<as.Date("2003/07/22","%Y/%m/%d")]="PvdA"

tk.corpus$Gov_Parties[tk.corpus$date<as.Date("1994/08/22","%Y/%m/%d")]="CDA, PvdA"
tk.corpus$Prime_Minister[tk.corpus$date<as.Date("1994/08/22","%Y/%m/%d")]="CDA"

for(i in unique(tk.corpus$party)){
  tk.corpus$Role[tk.corpus$party==i&grepl(i,tk.corpus$Gov_Parties)&tk.corpus$Prime_Minister==i]="Prime Minister Party"
  tk.corpus$Role[tk.corpus$party==i&grepl(i,tk.corpus$Gov_Parties)&tk.corpus$Prime_Minister!=i]="Junior Coalition Partner"
  tk.corpus$Role[tk.corpus$party==i&!grepl(i,tk.corpus$Gov_Parties)]="Opposition"
}

tk.corpus$nextelection=as.Date("2021/03/15","%Y/%m/%d")
tk.corpus$nextelection[tk.corpus$date<as.Date("2017/03/15","%Y/%m/%d")]=as.Date("2017/03/15","%Y/%m/%d")
tk.corpus$nextelection[tk.corpus$date<as.Date("2012/09/12","%Y/%m/%d")]=as.Date("2012/09/12","%Y/%m/%d")
tk.corpus$nextelection[tk.corpus$date<as.Date("2010/06/09","%Y/%m/%d")]=as.Date("2010/06/09","%Y/%m/%d")
tk.corpus$nextelection[tk.corpus$date<as.Date("2006/11/22","%Y/%m/%d")]=as.Date("2006/11/22","%Y/%m/%d")
tk.corpus$nextelection[tk.corpus$date<as.Date("2003/01/22","%Y/%m/%d")]=as.Date("2003/01/22","%Y/%m/%d")
tk.corpus$nextelection[tk.corpus$date<as.Date("2002/05/15","%Y/%m/%d")]=as.Date("2002/05/15","%Y/%m/%d")
tk.corpus$nextelection[tk.corpus$date<as.Date("1998/05/06","%Y/%m/%d")]=as.Date("1998/05/06","%Y/%m/%d")
tk.corpus$nextelection[tk.corpus$date<as.Date("1994/05/03","%Y/%m/%d")]=as.Date("1994/05/03","%Y/%m/%d")
tk.corpus$time_to_next_election=tk.corpus$date-tk.corpus$nextelection

tk.corpus$dist_to_next_election_months=mondf(tk.corpus$months,tk.corpus$nextelection)
tk.corpus$log_dist_to_next_election_months=(log(tk.corpus$dist_to_next_election_months))*-1
tk.corpus$country_name="Netherlands"

tk.corpus=left_join(x=tk.corpus,y=min_govs,by=c("date", "country_name"))
tk.corpus$last_year_before_election=ifelse(tk.corpus$dist_to_next_election_months<13,1,0)

corpus_netherlands=corpus(tk.corpus,text_field = "text",docid_field = "speechnumber")
save(corpus_netherlands,file="5_corpus_netherlands.RData")
rm(tk.corpus,corpus_netherlands)

###############################################
# United Kingdom
###############################################

load("Corp_HouseOfCommons.Rdata")
hoc.corpus$date=as.Date(hoc.corpus$date)
hoc.corpus$speechnumber=as.character(hoc.corpus$speechnumber)
hoc.corpus$months=format(hoc.corpus$date,"%Y-%m")
hoc.corpus$months=as.Date(paste(hoc.corpus$months,"-01",sep=""))
hoc.corpus=hoc.corpus%>%filter(chair==FALSE)
hoc.corpus=hoc.corpus%>%filter(!party%in%c("other","APNI","Referendum","UKUP"))
hoc.corpus$party.label=NA
hoc.corpus$party.label[hoc.corpus$party=="Con"]="Conservatives"
hoc.corpus$party.label[hoc.corpus$party=="DUP"]="Democratic Unionist Party"
hoc.corpus$party.label[hoc.corpus$party=="GPEW"]="Green Party"
hoc.corpus$party.label[hoc.corpus$party=="Lab"]="Labour"
hoc.corpus$party.label[hoc.corpus$party=="LibDem"]="Liberal Democrats"
hoc.corpus$party.label[hoc.corpus$party=="PlaidCymru"]="Plaid Cymru"
hoc.corpus$party.label[hoc.corpus$party=="Respect"]="Respect -- The Unity Coalition"
hoc.corpus$party.label[hoc.corpus$party=="SDLP"]="Social Democratic and Labour Party"
hoc.corpus$party.label[hoc.corpus$party=="SDP"]="Social Democratic Party"
hoc.corpus$party.label[hoc.corpus$party=="SNP"]="Scottish National Party"
hoc.corpus$party.label[hoc.corpus$party=="UKIP"]="United Kingdom Independence Party"
hoc.corpus$party.label[hoc.corpus$party=="UPUP"]="Ulster Popular Unionist Party"
hoc.corpus$party.label[hoc.corpus$party=="UUP"]="Ulster Unionist Party"

hoc.corpus$Gov_Parties="Con"
hoc.corpus$Prime_Minister="Con"
hoc.corpus$Gov_Parties[hoc.corpus$date<as.Date("2015/05/08","%Y/%m/%d")]="Con, LibDem"
hoc.corpus$Prime_Minister[hoc.corpus$date<as.Date("2015/05/08","%Y/%m/%d")]="Con"
hoc.corpus$Gov_Parties[hoc.corpus$date<as.Date("2010/05/11","%Y/%m/%d")]="Lab"
hoc.corpus$Prime_Minister[hoc.corpus$date<as.Date("2010/05/11","%Y/%m/%d")]="Lab"
hoc.corpus$Gov_Parties[hoc.corpus$date<as.Date("1997/05/02","%Y/%m/%d")]="Con"
hoc.corpus$Prime_Minister[hoc.corpus$date<as.Date("1997/05/02","%Y/%m/%d")]="Con"


for(i in unique(hoc.corpus$party)){
  hoc.corpus$Role[hoc.corpus$party==i&grepl(i,hoc.corpus$Gov_Parties)&hoc.corpus$Prime_Minister==i]="Prime Minister Party"
  hoc.corpus$Role[hoc.corpus$party==i&grepl(i,hoc.corpus$Gov_Parties)&hoc.corpus$Prime_Minister!=i]="Junior Coalition Partner"
  hoc.corpus$Role[hoc.corpus$party==i&!grepl(i,hoc.corpus$Gov_Parties)]="Opposition"
}

hoc.corpus$nextelection=as.Date("2021/06/08","%Y/%m/%d")
hoc.corpus$nextelection[hoc.corpus$date<as.Date("2017/06/08","%Y/%m/%d")]=as.Date("2017/06/08","%Y/%m/%d")
hoc.corpus$nextelection[hoc.corpus$date<as.Date("2015/05/07","%Y/%m/%d")]=as.Date("2015/05/07","%Y/%m/%d")
hoc.corpus$nextelection[hoc.corpus$date<as.Date("2010/05/06","%Y/%m/%d")]=as.Date("2010/05/06","%Y/%m/%d")
hoc.corpus$nextelection[hoc.corpus$date<as.Date("2005/05/05","%Y/%m/%d")]=as.Date("2005/05/05","%Y/%m/%d")
hoc.corpus$nextelection[hoc.corpus$date<as.Date("2001/06/07","%Y/%m/%d")]=as.Date("2001/06/07","%Y/%m/%d")
hoc.corpus$nextelection[hoc.corpus$date<as.Date("1997/05/01","%Y/%m/%d")]=as.Date("1997/05/01","%Y/%m/%d")
hoc.corpus$nextelection[hoc.corpus$date<as.Date("1992/04/09","%Y/%m/%d")]=as.Date("1992/04/09","%Y/%m/%d")
hoc.corpus$time_to_next_election=hoc.corpus$date-hoc.corpus$nextelection

hoc.corpus$dist_to_next_election_months=mondf(hoc.corpus$months,hoc.corpus$nextelection)
hoc.corpus$log_dist_to_next_election_months=(log(hoc.corpus$dist_to_next_election_months))*-1
hoc.corpus$country_name="United Kingdom"

hoc.corpus=left_join(x=hoc.corpus,y=min_govs,by=c("date", "country_name"))
hoc.corpus$last_year_before_election=ifelse(hoc.corpus$dist_to_next_election_months<13,1,0)

corpus_uk=corpus(hoc.corpus,text_field = "text",docid_field = "speechnumber")
save(corpus_uk,file="5_corpus_uk.RData")
rm(hoc.corpus,corpus_uk)

